`— title: “STA 222 Final Project” output: html_document date: “2022-12-03” —
library(survival)
library(KMsurv)
library(ggplot2)
library(ggpubr)
library(survminer)
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
library(muhaz)
library(plyr)
##
## Attaching package: 'plyr'
## The following object is masked from 'package:ggpubr':
##
## mutate
library(tidyr)
library(plotly)
##
## Attaching package: 'plotly'
## The following objects are masked from 'package:plyr':
##
## arrange, mutate, rename, summarise
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggthemes)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
##
## select
library(gridExtra)
# Loading in data
data(std)
std0 = std
# Checking for NA Values in data set
any(is.na(std))
## [1] FALSE
factor.cols <-
c(
"race",
# Race (W=white, B=black)
"marital",
# Marital status (D=divorced / separated, M=married, S=single)
"iinfct",
# Initial infection (1= gonorrhea, 2=chlamydia, 3=both)
"os12m",
# Oral sex within 12 months (1=yes, 0=no)
"os30d",
# Oral sex within 30 days (1=yes, 0=no)
"rs12m",
# Rectal sex within 12 months (1=yes, 0=no)
"rs30d",
# Rectal sex within 30 days (1=yes, 0=no)
"abdpain",
# Presence of abdominal pain (1=yes, 0=no)
"discharge",
# Sign of discharge (1=yes, 0=no)
"dysuria",
# Sign of dysuria (1=yes, 0=no)
"condom",
# Condom use (1=always, 2=sometime, 3=never)
"itch",
# Sign of itch (1=yes, 0=no)
"lesion",
# Sign of lesion (1=yes, 0=no)
"rash",
# Sign of rash (1=yes, 0=no)
"lymph",
# Sign of lymph (1=yes, 0=no)
"vagina",
# Involvement vagina at exam (1=yes, 0=no)
"dchexam",
# Discharge at exam (1=yes, 0=no)
"abnode" # Abnormal node at exam (1=yes, 0=no)
)
std[factor.cols] <- lapply(std[factor.cols], as.factor)
std$race <-
mapvalues(std$race,
from = c("W", "B"),
to = c("White", "Black"))
std$marital <-
mapvalues(
std$marital,
from = c("D", "M", "S"),
to = c("Divorced/Separated", "Married", "Single")
)
std$iinfct <-
mapvalues(
std$iinfct,
from = c("1", "2", "3"),
to = c("gonorrhea", "chlamydia", "both")
)
std$condom <-
mapvalues(
std$condom,
from = c("1", "2", "3"),
to = c("always", "sometime", "never")
)
# Building the Survival Object
infect <- std$iinfct
surv_object <- Surv(time = std$time, event = std$rinfct)
## Call:
## survdiff(formula = surv_object ~ iinfct, data = std)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## iinfct=gonorrhea 140 73 54.5 6.28042 7.50617
## iinfct=chlamydia 396 135 153.0 2.12201 3.81136
## iinfct=both 341 139 139.5 0.00166 0.00278
##
## Chisq= 8.5 on 2 degrees of freedom, p= 0.01
## Call:
## coxph(formula = surv_object ~ iinfct, data = std)
##
## n= 877, number of events= 347
##
## coef exp(coef) se(coef) z Pr(>|z|)
## iinfctchlamydia -0.4202 0.6569 0.1457 -2.884 0.00393 **
## iinfctboth -0.2980 0.7423 0.1450 -2.055 0.03984 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## iinfctchlamydia 0.6569 1.522 0.4937 0.8741
## iinfctboth 0.7423 1.347 0.5587 0.9863
##
## Concordance= 0.524 (se = 0.016 )
## Likelihood ratio test= 7.93 on 2 df, p=0.02
## Wald test = 8.37 on 2 df, p=0.02
## Score (logrank) test = 8.46 on 2 df, p=0.01
cox1 <-
coxph(
surv_object ~ iinfct + marital + race + os12m + os30d +
rs12m + rs30d + abdpain + discharge + dysuria + condom +
itch + lesion + rash + lymph + vagina + dchexam + abnode +
age + yschool + npartner,
data = std
)
summary(cox1)
## Call:
## coxph(formula = surv_object ~ iinfct + marital + race + os12m +
## os30d + rs12m + rs30d + abdpain + discharge + dysuria + condom +
## itch + lesion + rash + lymph + vagina + dchexam + abnode +
## age + yschool + npartner, data = std)
##
## n= 877, number of events= 347
##
## coef exp(coef) se(coef) z Pr(>|z|)
## iinfctchlamydia -0.334628 0.715604 0.149647 -2.236 0.02534 *
## iinfctboth -0.267515 0.765279 0.149987 -1.784 0.07449 .
## maritalMarried 0.055058 1.056602 0.431303 0.128 0.89842
## maritalSingle 0.408097 1.503953 0.295341 1.382 0.16704
## raceWhite -0.111462 0.894526 0.141327 -0.789 0.43030
## os12m1 -0.206151 0.813711 0.212028 -0.972 0.33091
## os30d1 -0.339983 0.711783 0.238657 -1.425 0.15428
## rs12m1 0.033955 1.034538 0.445166 0.076 0.93920
## rs30d1 -0.194771 0.823023 0.565199 -0.345 0.73039
## abdpain1 0.229308 1.257729 0.156236 1.468 0.14219
## discharge1 0.114691 1.121527 0.114283 1.004 0.31559
## dysuria1 0.164089 1.178320 0.155157 1.058 0.29025
## condomsometime -0.064082 0.937928 0.239642 -0.267 0.78916
## condomnever -0.321968 0.724721 0.247790 -1.299 0.19382
## itch1 -0.147451 0.862905 0.154774 -0.953 0.34075
## lesion1 -0.183670 0.832210 0.333523 -0.551 0.58184
## rash1 0.008298 1.008333 0.392928 0.021 0.98315
## lymph1 -0.030840 0.969631 0.547496 -0.056 0.95508
## vagina1 0.351398 1.421053 0.174868 2.010 0.04448 *
## dchexam1 -0.463338 0.629180 0.230020 -2.014 0.04397 *
## abnode1 0.170712 1.186149 0.433788 0.394 0.69392
## age 0.008096 1.008129 0.013913 0.582 0.56066
## yschool -0.128072 0.879790 0.039366 -3.253 0.00114 **
## npartner 0.076670 1.079686 0.053888 1.423 0.15480
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## iinfctchlamydia 0.7156 1.3974 0.5337 0.9595
## iinfctboth 0.7653 1.3067 0.5704 1.0268
## maritalMarried 1.0566 0.9464 0.4537 2.4606
## maritalSingle 1.5040 0.6649 0.8430 2.6830
## raceWhite 0.8945 1.1179 0.6781 1.1800
## os12m1 0.8137 1.2289 0.5370 1.2330
## os30d1 0.7118 1.4049 0.4459 1.1363
## rs12m1 1.0345 0.9666 0.4323 2.4756
## rs30d1 0.8230 1.2150 0.2718 2.4918
## abdpain1 1.2577 0.7951 0.9260 1.7083
## discharge1 1.1215 0.8916 0.8965 1.4031
## dysuria1 1.1783 0.8487 0.8693 1.5971
## condomsometime 0.9379 1.0662 0.5864 1.5002
## condomnever 0.7247 1.3798 0.4459 1.1778
## itch1 0.8629 1.1589 0.6371 1.1687
## lesion1 0.8322 1.2016 0.4328 1.6000
## rash1 1.0083 0.9917 0.4668 2.1780
## lymph1 0.9696 1.0313 0.3316 2.8355
## vagina1 1.4211 0.7037 1.0087 2.0020
## dchexam1 0.6292 1.5894 0.4008 0.9876
## abnode1 1.1861 0.8431 0.5069 2.7758
## age 1.0081 0.9919 0.9810 1.0360
## yschool 0.8798 1.1366 0.8145 0.9504
## npartner 1.0797 0.9262 0.9715 1.2000
##
## Concordance= 0.635 (se = 0.016 )
## Likelihood ratio test= 73.37 on 24 df, p=7e-07
## Wald test = 69.89 on 24 df, p=2e-06
## Score (logrank) test = 71.71 on 24 df, p=1e-06
## Single term deletions
##
## Model:
## surv_object ~ iinfct + marital + race + os12m + os30d + rs12m +
## rs30d + abdpain + discharge + dysuria + condom + itch + lesion +
## rash + lymph + vagina + dchexam + abnode + age + yschool +
## npartner
## Df AIC LRT Pr(>Chi)
## <none> 4121.2
## iinfct 2 4122.2 4.9877 0.082590 .
## marital 2 4119.8 2.6535 0.265345
## race 1 4119.8 0.6300 0.427371
## os12m 1 4120.2 0.9869 0.320512
## os30d 1 4121.1 1.9585 0.161675
## rs12m 1 4119.2 0.0058 0.939450
## rs30d 1 4119.3 0.1175 0.731758
## abdpain 1 4121.2 2.0656 0.150654
## discharge 1 4120.2 1.0055 0.315972
## dysuria 1 4120.2 1.0820 0.298256
## condom 2 4122.2 5.0381 0.080535 .
## itch 1 4120.1 0.9324 0.334250
## lesion 1 4119.5 0.3156 0.574248
## rash 1 4119.2 0.0004 0.983170
## lymph 1 4119.2 0.0032 0.954914
## vagina 1 4122.9 3.7409 0.053095 .
## dchexam 1 4122.8 3.6128 0.057335 .
## abnode 1 4119.3 0.1482 0.700309
## age 1 4119.5 0.3321 0.564436
## yschool 1 4129.7 10.5087 0.001188 **
## npartner 1 4121.0 1.8274 0.176434
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cox.model = coxph(surv_object ~ iinfct+condom+vagina
+dchexam+yschool, data = std)
summary(cox.model)
## Call:
## coxph(formula = surv_object ~ iinfct + condom + vagina + dchexam +
## yschool, data = std)
##
## n= 877, number of events= 347
##
## coef exp(coef) se(coef) z Pr(>|z|)
## iinfctchlamydia -0.38468 0.68067 0.14618 -2.632 0.00850 **
## iinfctboth -0.24681 0.78129 0.14561 -1.695 0.09006 .
## condomnever -0.29779 0.74245 0.11551 -2.578 0.00994 **
## vagina1 0.40660 1.50171 0.16739 2.429 0.01514 *
## dchexam1 -0.37042 0.69044 0.22123 -1.674 0.09405 .
## yschool -0.14357 0.86626 0.03332 -4.308 1.64e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## iinfctchlamydia 0.6807 1.4692 0.5111 0.9065
## iinfctboth 0.7813 1.2799 0.5873 1.0393
## condomnever 0.7425 1.3469 0.5920 0.9311
## vagina1 1.5017 0.6659 1.0817 2.0848
## dchexam1 0.6904 1.4483 0.4475 1.0652
## yschool 0.8663 1.1544 0.8115 0.9247
##
## Concordance= 0.603 (se = 0.017 )
## Likelihood ratio test= 45.32 on 6 df, p=4e-08
## Wald test = 46.51 on 6 df, p=2e-08
## Score (logrank) test = 46.77 on 6 df, p=2e-08
cox.modelNoSchool <- coxph(surv_object ~ iinfct+condom+vagina+os30d+abdpain, data = std)
martNoSchool <- residuals(cox.modelNoSchool, type = "martingale")
# We plot martingale residuals to see if transformation is appropriate
lowessOBJ <- as.data.frame(lowess(std$yschool, martNoSchool))
ggplotly(
ggplot() + aes(std$yschool, martNoSchool) + geom_point() +
labs(x = "Years of School", y = "Martingale Residuals", title = "Martingale Residuals vs. Years of School") +
geom_line(data = lowessOBJ, aes(x = x, y = y), col = "#6495ed")
)
## chisq df p
## iinfct 3.5184 2 0.17
## condom 1.0539 1 0.30
## vagina 1.2835 1 0.26
## dchexam 0.4085 1 0.52
## yschool 0.0214 1 0.88
## GLOBAL 6.1176 6 0.41
## obs race marital age yschool iinfct npartner os12m os30d
## 11 11 Black Divorced/Separated 32 12 both 6 1 1
## 366 366 White Single 14 9 gonorrhea 1 0 0
## 525 525 Black Single 15 8 both 1 1 1
## 831 831 White Single 20 12 chlamydia 10 1 1
## rs12m rs30d abdpain discharge dysuria condom itch lesion rash lymph vagina
## 11 0 0 1 1 0 use 0 0 0 0 1
## 366 0 0 0 0 1 use 0 0 0 0 0
## 525 0 0 0 1 0 never 0 0 0 0 1
## 831 1 1 0 1 0 use 1 0 0 0 1
## dchexam abnode rinfct time
## 11 1 0 0 1468
## 366 1 0 0 1439
## 525 1 0 0 1005
## 831 0 0 0 1027